Generate a map of the distribution of M. assimilis tissue samples across Australia coded by collection date
# load libraries
library(oz)
library(readxl)
# load dataset
samples <- read_excel("Samples-Geo-Date.xlsx")
# set the latitudes and longitudes of points
lat1 <- samples$`Latitude-1`
lat2 <- samples$`Latitude-2`
lat3 <- samples$`Latitude-3`
lat4 <- samples$`Latitude-4`
lat5 <- samples$`Latitude-5`
lat6 <- samples$`Latitude-6`
lat7 <- samples$`Latitude-7`
long1 <- samples$`Longitude-1`
long2 <- samples$`Longitude-2`
long3 <- samples$`Longitude-3`
long4 <- samples$`Longitude-4`
long5 <- samples$`Longitude-5`
long6 <- samples$`Longitude-6`
long7 <- samples$`Longitude-7`
# plot Australia
oz()
# plot points
points(long1,lat1, col = "#85C1E9", bg = "#85C1E9", pch = 24)
points(long2,lat2, col = "#5DADE2", bg = "#5DADE2", pch = 24)
points(long3,lat3, col = "#3498DB", bg = "#3498DB", pch = 24)
points(long4,lat4, col = "#2E86C1", bg = "#2E86C1", pch = 24)
points(long5,lat5, col = "#2874A6", bg = "#2874A6", pch = 24)
points(long6,lat6, col = "#21618C", bg = "#21618C", pch = 24)
points(long7,lat7, col = "#1B4F72", bg = "#1B4F72", pch = 24)
Plotting parameters calculated for SNP filtering including: Coverage Depth, Individual Missingness, Average Site Depth and Site Missingness
# load libraries
library(ggplot2)
library(readxl)
# load dataset
# this is an excel file containing the coverage of each individual sample as
# estimated using SamTools
coverage <- read_excel("coverage.xlsx")
# plot a histogram of the distribution of sample coverage using ggplot2
ggplot (coverage, aes(coverage$`Coverage Depth`)) + geom_histogram(breaks=seq(0,8, by =1), col="grey1", aes(fill=..count..)) + scale_fill_gradient("Count", high = "paleturquoise4", low = "paleturquoise1") + labs(x="Coverage Depth", y = "Frequency") + theme_bw()
# load libraries
library(ggplot2)
library(readxl)
# load dataset
# this is an excel file containing the average depth of 100,000 random sites
# trimmed of sites with average depth >8 as these sites drag the histogram out
# such that the resolution is unreadable
meansitedepth <- read_excel("mean-site-depth-trimmed-2.xlsx")
# set the variable sitedepth
sitedepth <- meansitedepth$`Mean Site Depth`
# create a histogram
hist(sitedepth, breaks=100, main = "Average Site Depth",
xlab = "Average Site Depth")
# calculate the average depth
mean <- mean(sitedepth)
# calculate the standard deviation
SD <- sd(sitedepth)
# calculate 3 standard deviations
sdevs <- SD * 3
# set the cutoff for average site depth as anything above this are usually
# sequencing artefacts or paralogs
cutoff <- sdevs + mean
# add lines representing the average site depth and cutoff site depth to the
# histogram
abline(v = mean, col = "red", lwd = 2)
abline(v = cutoff, col = "blue", lwd = 2)
# load libraries
library(ggplot2)
library(readxl)
# load dataset
# this is an excel file containing the individual missingness values for all
# samples across all 25 chromosomes
missingness <- read_excel("missingness.xlsx")
# plot missingness
ggplot (missingness, aes(missingness)) + geom_histogram(breaks=seq(0,0.8, by =0.1), col="grey1", aes(fill=..count..)) + scale_fill_gradient("Count", high = "red4", low = "palevioletred2") + labs(x="Individual Missingness", y = "Frequency") + theme_bw()
# load libraries
library(ggplot2)
library(readxl)
# load dataset
# this is an excel file containing the missingness of 100,000 sites across
# all 25 chromosomes
sitemissingness <- read_excel("site-missingness.xlsx")
# plot site missingness
ggplot (sitemissingness, aes(sitemissingness$`site-missingness`)) + geom_histogram(breaks=seq(0,1, by =0.1), col="grey1", aes(fill=..count..)) + scale_fill_gradient("Count", high = "goldenrod2", low = "lightgoldenrod2") + labs(x="Site Missingness", y = "Frequency") + theme_bw()
# load libraries
library(ggplot2)
library(readxl)
library(RcppCNPy) # for reading .npy (NUMPY) files
library(sf)
## Linking to GEOS 3.6.1, GDAL 2.1.3, PROJ 4.9.3
# load datasets
# PCA.cov.npy is the covariance matrix without outlier individuals filtered, n=133
# PCA.filtered.cov.npy is the covariance matrix with outlier individuals filtered, # n = 119
# PCA.climate.cov.npy is the covariance matrix with individuals from PCA.filtered.cov.npy who are missing climate data removed
covmatrix <- npyLoad("PCA.cov.npy")
covmatrixfiltered <- npyLoad("PCA.filtered.cov.npy")
climcovmatrix <- npyLoad("PCA.climate.cov.npy")
# calculate eigenvalues
Eigenvalues <- eigen(covmatrixfiltered)$values
# calculate eigenvectors
Eigenvectors <- eigen(covmatrixfiltered)$vectors
# calculate the principal components using a matrix multiplication
PC <- as.matrix(covmatrixfiltered) %*% Eigenvectors
# transpose the data in order to get the loading of the variables for each principal component
PCs <- t(PC)
# calculate the proportions of the variation explained by the various components
print(round(Eigenvalues/sum(Eigenvalues) * 100, digits = 2))
## [1] 3.18 1.23 1.19 1.09 1.05 1.04 1.03 1.00 0.98 0.97 0.96 0.95 0.95 0.94
## [15] 0.94 0.93 0.92 0.92 0.91 0.90 0.90 0.88 0.88 0.88 0.87 0.86 0.86 0.86
## [29] 0.85 0.84 0.84 0.84 0.83 0.83 0.83 0.83 0.83 0.83 0.83 0.83 0.83 0.82
## [43] 0.82 0.82 0.82 0.82 0.82 0.82 0.82 0.82 0.82 0.81 0.81 0.81 0.81 0.81
## [57] 0.81 0.81 0.81 0.81 0.81 0.81 0.81 0.81 0.81 0.81 0.81 0.81 0.81 0.80
## [71] 0.80 0.80 0.80 0.80 0.80 0.80 0.80 0.80 0.80 0.80 0.80 0.80 0.80 0.80
## [85] 0.80 0.80 0.80 0.80 0.79 0.79 0.79 0.79 0.79 0.79 0.79 0.79 0.79 0.79
## [99] 0.79 0.79 0.78 0.78 0.78 0.78 0.78 0.77 0.76 0.75 0.68 0.68 0.65 0.64
## [113] 0.64 0.61 0.59 0.58 0.58 0.57 0.22
round(cumsum(Eigenvalues)/sum(Eigenvalues) * 100, digits = 2)
## [1] 3.18 4.41 5.60 6.69 7.74 8.78 9.81 10.81 11.79 12.76
## [11] 13.73 14.68 15.63 16.57 17.51 18.44 19.37 20.29 21.19 22.09
## [21] 22.99 23.87 24.75 25.63 26.50 27.36 28.22 29.08 29.93 30.77
## [31] 31.61 32.45 33.28 34.11 34.94 35.77 36.60 37.43 38.26 39.08
## [41] 39.91 40.73 41.56 42.38 43.20 44.02 44.84 45.66 46.47 47.29
## [51] 48.10 48.92 49.73 50.55 51.36 52.17 52.98 53.79 54.60 55.41
## [61] 56.22 57.03 57.84 58.65 59.46 60.26 61.07 61.88 62.68 63.49
## [71] 64.29 65.09 65.90 66.70 67.50 68.30 69.10 69.91 70.71 71.51
## [81] 72.30 73.10 73.90 74.70 75.50 76.30 77.09 77.89 78.68 79.48
## [91] 80.27 81.06 81.85 82.64 83.43 84.22 85.01 85.79 86.58 87.36
## [101] 88.15 88.93 89.71 90.49 91.27 92.04 92.80 93.56 94.24 94.92
## [111] 95.57 96.21 96.85 97.46 98.05 98.63 99.21 99.78 100.00
# take the principal component scores for individuals to create the PCA plot
PC1 <- PC[,1]
PC2 <- PC[,2]
PC3 <- PC[,3]
# transform into a dataframe and copied into an excel spreadsheet to form the
# principal component values in PCA-metadata.xlsx
PCA <- data.frame(PC1, PC2, PC3)
# create a biplot by hand
# load libraries
library(readxl)
# load dataset
biplot.pca.metadata <- read_excel("Biplot Metadata.xlsx")
biplot.metadata <- read_excel("PCA-metadata.xlsx")
# set pch and colour groups (grouped by geography: NSW, NT, QLD, SA, VIC, WA, NA)
pch.group <- c(rep(21, times=10), rep(22, times=10), rep(23, times=10), rep(24, times=43), rep(3, times=1), rep(25, times=43), rep(4, times=2))
# red = NSW, gold = NT, green = QLD, blue = SA, black = VIC, pink = WA, black X = N.A
col.group <- c(rep("red", times=10), rep("gold", times=10), rep("green", times=10), rep("skyblue2", times=43), rep("black", times=1), rep("violet", times=43), rep("grey", times=2))
# plot the individuals in principal component space grouped by their shape and colour for state of collection
plot(x=biplot.pca.metadata$PC1, y=biplot.pca.metadata$PC2, xlab="PC1 (3.18%)", ylab="PC2 (1.23%)", xlim=c(-1.0,1.0),ylim=c(-0.6,0.6), pch=pch.group, col="black", bg=col.group, cex=2, las=1, asp=1)
# add horizontal and vertical axes at zero
abline(v=0, lty=2, col="grey50")
abline(h=0, lty=2, col="grey50")
# set the x and y co-ordinates of the variables, which are equivalent to
# the loading scores of the variables (i.e. the individual samples)
# these will form the arrows on the bi-lot and were created by transposing the
# principal component data made above (check PCs)
l.x <- PCs[,1]
l.y <- PCs[,2]
arrows(x0=0, x1=l.x, y0=0, y1=l.y, col="red", length = 0.1, lwd = 1.5)
# set text labels
l.pos <- l.y
lo <- which(l.y < 0)
hi <- which(l.y > 0)
l.pos <- replace(l.pos, lo, "1")
l.pos <- replace(l.pos, hi, "3")
text(l.x, l.y, labels=biplot.pca.metadata$`SAMPLE ID`, col="red", pos=l.pos)
# load the new data
PCA <- read_excel("PCA-metadata.xlsx")
# plot principal components with the collection date and geographic
# location (state) metadata
ggplot(PCA, aes(x=PC1, y=PC2)) + geom_point(aes(shape = Period, color = STATE), position = "jitter", size = 2.5) + scale_shape_manual(values = c(3, 4, 8, 15, 17, 18, 19, 25)) + theme_bw()
# plot principal components with the collection date and indvidual
# missingness metadata
ggplot (PCA, aes(x=PC1, y=PC2)) + geom_point(aes(shape = Period, color = MISSINGNESS), position = "jitter", size = 2.5) + scale_shape_manual(values = c(3, 4, 8, 15, 17, 18, 19, 25)) + scale_color_continuous(high = "black", low = "turquoise2") + theme_bw()
# plot samples by latitude and longitude on a scale of principal
# componentvalues
australia <- st_read("australia.shp")
## Reading layer `australia' from data source `/Users/Elroy/Documents/University/Honours 2019/Everything/BIOINFOMATICS/australia.shp' using driver `ESRI Shapefile'
## Simple feature collection with 8 features and 59 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 112.9194 ymin: -43.64129 xmax: 153.6306 ymax: -9.240167
## epsg (SRID): NA
## proj4string: NA
a <- ggplot() + geom_sf(data = australia, color = "black", fill = "white")
a + geom_point(data=PCA, aes(x=LONGITUDE, y=LATITUDE, color = PC1), size = 2.5) + scale_color_gradient2()
## Warning: Removed 5 rows containing missing values (geom_point).
# load libraries
library(ggplot2)
library(readxl)
library(tidyverse) # for correlation matrix
## ── Attaching packages ────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble 2.1.3 ✔ purrr 0.3.2
## ✔ tidyr 1.0.0 ✔ dplyr 0.8.3
## ✔ readr 1.3.1 ✔ stringr 1.4.0
## ✔ tibble 2.1.3 ✔ forcats 0.4.0
## ── Conflicts ───────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
# load the new data
PCA <- read_excel("PCA-metadata.xlsx")
# linear regression model of PC1 values and individual missingness
regression <- lm(PC1~MISSINGNESS, data = PCA)
summary(regression)
##
## Call:
## lm(formula = PC1 ~ MISSINGNESS, data = PCA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6637 -0.2794 -0.1337 0.3312 0.8308
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.08118 0.06700 -1.212 0.228
## MISSINGNESS 0.84386 0.52296 1.614 0.109
##
## Residual standard error: 0.3809 on 117 degrees of freedom
## Multiple R-squared: 0.02177, Adjusted R-squared: 0.01341
## F-statistic: 2.604 on 1 and 117 DF, p-value: 0.1093
# calculate the Pearson's correlation coefficient
cPC1 <- PCA$PC1
cMiss <- PCA$MISSINGNESS
cor.test(cMiss, cPC1, method=c("pearson"))
##
## Pearson's product-moment correlation
##
## data: cMiss and cPC1
## t = 1.6136, df = 117, p-value = 0.1093
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.03333433 0.31906832
## sample estimates:
## cor
## 0.1475465
# create a correlation matrix
cor(PCA %>% select(PC1, PC2, Year, LATITUDE, LONGITUDE), use="pairwise.complete.obs")
## PC1 PC2 Year LATITUDE LONGITUDE
## PC1 1.000000000 -0.008453158 -0.3630122 -0.5775262 0.6651945
## PC2 -0.008453158 1.000000000 0.1581540 0.6096030 0.2317461
## Year -0.363012228 0.158153981 1.0000000 0.4100755 -0.3038095
## LATITUDE -0.577526165 0.609603032 0.4100755 1.0000000 -0.3364011
## LONGITUDE 0.665194486 0.231746097 -0.3038095 -0.3364011 1.0000000
# plot a linear regression
ggplot(PCA, aes(y=PC1, x=MISSINGNESS)) + geom_point() + geom_smooth(method="lm") + theme_bw()
# load libraries
library(RColorBrewer)
# load the data
pop <- read.table("Admix-Pop-Data.txt", fill = TRUE, header = FALSE)
q2 <- read.table("admix.2.txt", fill = TRUE, header = FALSE)
q3 <- read.table("admix.3.txt", fill = TRUE, header = FALSE)
q4 <- read.table("admix.4.txt", fill = TRUE, header = FALSE)
q5 <- read.table("admix.5.txt", fill = TRUE, header = FALSE)
q6 <- read.table("admix.6.txt", fill = TRUE, header = FALSE)
# order according to population
ord <- order (pop[,2])
# plot admixture for k
barplot(t(q2)[,ord],col=2:10, space=0, border=NA, xlab="Individuals", ylab="AdmixtureProportions(K=2)")
# plot admixture for k using color brewer scheme
barplot(t(q3)[,ord],col=brewer.pal(n=3, name="RdBu"),
space=0, border=NA, xlab="Individuals",
ylab="Admixture Proportions (K=3)")
# add individual population labels
text(tapply(1:nrow(pop), pop[ord,2], mean),-0.05, unique(pop[ord,2]),xpd=T)
# add lines between each individual and population
abline(v=cumsum(sapply(unique(pop[ord,1]), function(x){sum(pop[ord,1]==x)})), col="white",lwd=0.5)
abline(v=cumsum(sapply(unique(pop[ord,2]), function(x){sum(pop[ord,2]==x)})), col=1,lwd=2)
# load libraries
library(ggplot2)
library(ggrepel)
library(scatterpie)
library(sf)
library(readxl)
admixture <- read_excel("admix-metadata.xlsx")
# load shapefile of Australia
australia <- st_read("australia.shp")
## Reading layer `australia' from data source `/Users/Elroy/Documents/University/Honours 2019/Everything/BIOINFOMATICS/australia.shp' using driver `ESRI Shapefile'
## Simple feature collection with 8 features and 59 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 112.9194 ymin: -43.64129 xmax: 153.6306 ymax: -9.240167
## epsg (SRID): NA
## proj4string: NA
# create a vector plot of Australia
a <- ggplot() + geom_sf(data = australia, color = "black", fill = "white")
# K = 2
Lat <- admixture$LATITUDE
Long <- admixture$LONGITUDE
K1 <- admixture$K2.1
K2 <- admixture$K2.2
admixdata2 <- data.frame(Lat,Long,K1,K2)
a + geom_scatterpie(aes(x = Long, y = Lat), data = admixdata2, cols = c("K1", "K2")) + scale_fill_manual(values=c("#F39C12", "#C39BD3", "#82E0AA", "#F7DC6F", "#EC7063", "#85C1E9")) + theme_light()
# K = 3
Lat <- admixture$LATITUDE
Long <- admixture$LONGITUDE
K1 <- admixture$K3.1
K2 <- admixture$K3.2
K3 <- admixture$K3.3
admixdata3 <- data.frame(Lat,Long,K1,K2,K3)
a + geom_scatterpie(aes(x = Long, y = Lat), data = admixdata3, cols = c("K1", "K2", "K3")) + scale_fill_manual(values=c("#F39C12", "#C39BD3", "#82E0AA", "#F7DC6F", "#EC7063", "#85C1E9")) + theme_light()
# K = 4
Lat <- admixture$LATITUDE
Long <- admixture$LONGITUDE
K1 <- admixture$K4.1
K2 <- admixture$K4.2
K3 <- admixture$K4.3
K4 <- admixture$K4.4
admixdata4 <- data.frame(Lat,Long,K1,K2,K3,K4)
a + geom_scatterpie(aes(x = Long, y = Lat), data = admixdata4, cols = c("K1", "K2", "K3", "K4")) + scale_fill_manual(values=c("#F39C12", "#C39BD3", "#82E0AA", "#F7DC6F", "#EC7063", "#85C1E9")) + theme_light()
# K = 5
Lat <- admixture$LATITUDE
Long <- admixture$LONGITUDE
K1 <- admixture$K5.1
K2 <- admixture$K5.2
K3 <- admixture$K5.3
K4 <- admixture$K5.4
K5 <- admixture$K5.5
admixdata5 <- data.frame(Lat,Long,K1,K2,K3,K4,K5)
a + geom_scatterpie(aes(x = Long, y = Lat), data = admixdata5, cols = c("K1", "K2", "K3", "K4", "K5")) + scale_fill_manual(values=c("#F39C12", "#C39BD3", "#82E0AA", "#F7DC6F", "#EC7063", "#85C1E9")) + theme_light()
# K = 6
Lat <- admixture$LATITUDE
Long <- admixture$LONGITUDE
K1 <- admixture$K1
K2 <- admixture$K2
K3 <- admixture$K3
K4 <- admixture$K4
K5 <- admixture$K5
K6 <- admixture$K6
admixdata6 <- data.frame(Lat,Long,K1,K2,K3,K4,K5,K6)
a + geom_scatterpie(aes(x = Long, y = Lat), data = admixdata6, cols = c("K1", "K2", "K3", "K4", "K5", "K6")) + scale_fill_manual(values=c("#F39C12", "#C39BD3", "#82E0AA", "#F7DC6F", "#EC7063", "#85C1E9")) + theme_light()
First estimate slopes for change in each climate variable over time; separate file for each climate variable. yearJD - year jan-dec for winter variables. yearJJ - year july-june for summer variables. slopes based on 20 years of data prior to capture year. Removed year of capture as the amount of months each sample is exposed to in the year is variable (e.g. could be <1 if caught in January).
#### CHANGE IN MEAN TEMPERATURE ####
Change.Tmean <- read.csv("Tmean.ForChange.csv", header = T)
summary(Change.Tmean)
## SAMPLE.ID YearJJ Tmean
## ANWC100: 20 Min. :1968 Min. :11.12
## ANWC101: 20 1st Qu.:1988 1st Qu.:19.38
## ANWC102: 20 Median :1994 Median :22.14
## ANWC103: 20 Mean :1994 Mean :22.54
## ANWC104: 20 3rd Qu.:2000 3rd Qu.:26.73
## ANWC105: 20 Max. :2014 Max. :31.89
## (Other):2140
# these are missing from this data set: "SAM9","SAM23", "SAM24", "SAM22", "SAM10", "SAM7"
sample.list <- c("ANWC100","ANWC101","ANWC102","ANWC103","ANWC104","ANWC105","ANWC106","ANWC107","ANWC108","ANWC109","ANWC110","ANWC112","ANWC113","ANWC115","ANWC116", "ANWC117","ANWC118","ANWC119","ANWC126","ANWC128","ANWC129","ANWC130","ANWC131","ANWC132","ANWC133","ANWC134","ANWC135","ANWC136","ANWC137","ANWC138","ANWC139","ANWC140","ANWC141","ANWC142","ANWC143","ANWC144","ANWC145","ANWC146","ANWC147","ANWC148","ANWC150","ANWC151","ANWC152","ANWC153","ANWC154","ANWC155","ANWC156","ANWC157","ANWC158","ANWC159","ANWC160","ANWC161","ANWC162","ANWC163","ANWC168","ANWC170","ANWC66","ANWC67","ANWC68","ANWC69","ANWC71","ANWC72","ANWC73","ANWC75","ANWC77","ANWC78","ANWC79","ANWC80","ANWC81","ANWC84","ANWC85","ANWC88","ANWC90","ANWC92","ANWC94","SAM11","SAM12","SAM14","SAM15","SAM16","SAM17","SAM18","SAM19","SAM20","SAM21","SAM25","SAM26","SAM27","SAM28","SAM29", "SAM30","SAM31","SAM32","SAM33","SAM34","SAM35","SAM36","SAM37","SAM38","SAM39","SAM41", "SAM44","SAM45","SAM46","SAM8","WAM55","WAM56","WAM57","WAM58","WAM59","WAM60","WAM63","WAM64")
names(sample.list) <- c("sample") # couldn't find object "sample" before
Model.sample <- data.frame() # creates datafame fresh each time model is run, so don't add to previous file
for (sample in sample.list) #creates loop defined by curley brackets
{
sample.m1 <- lm(Tmean ~ scale(YearJJ, scale=FALSE),
data = subset(Change.Tmean, SAMPLE.ID==sample))
#summary(sample.m1)
sample.s1 <-cbind(sample, intercept = sample.m1$coefficients[1], coefYear = sample.m1$coefficients[2],
df.residual = sample.m1$df.residual, adj.r.square = summary(sample.m1)$adj.r.squared,
df = summary(sample.m1)$df[2], fstat = summary(sample.m1)$fstatistic[1], pvalue = summary(sample.m1)$coefficients[2,4])
Model.sample <-rbind(Model.sample, sample.s1) # saves summary to file
}
rownames(Model.sample) <- 1:length(rownames(Model.sample)) # this overwrites the first column, and numbers rows sequentially
write.csv(Model.sample,"ChangeMeanTemp.csv")
#### CHANGE IN NO. DAYS OVER 35 ####
Change.Ndays35<- read.csv("NDays35.ForChange.csv", header = T)
summary(Change.Ndays35)
## SAMPLE.ID YearJJ NDays35
## ANWC100: 20 Min. :1968 Min. : 0.00
## ANWC101: 20 1st Qu.:1988 1st Qu.: 43.00
## ANWC102: 20 Median :1994 Median : 84.00
## ANWC103: 20 Mean :1993 Mean : 94.56
## ANWC104: 20 3rd Qu.:2000 3rd Qu.:138.00
## ANWC105: 20 Max. :2014 Max. :284.00
## (Other):2140
# these are missing from this data set: "SAM9","SAM23", "SAM24", "SAM22", "SAM10", "SAM7"
sample.list <- c("ANWC100","ANWC101","ANWC102","ANWC103","ANWC104","ANWC105","ANWC106","ANWC107","ANWC108","ANWC109","ANWC110","ANWC112","ANWC113","ANWC115","ANWC116", "ANWC117","ANWC118","ANWC119","ANWC126","ANWC128","ANWC129","ANWC130","ANWC131","ANWC132","ANWC133","ANWC134","ANWC135","ANWC136","ANWC137","ANWC138","ANWC139","ANWC140","ANWC141","ANWC142","ANWC143","ANWC144","ANWC145","ANWC146","ANWC147","ANWC148","ANWC150","ANWC151","ANWC152","ANWC153","ANWC154","ANWC155","ANWC156","ANWC157","ANWC158","ANWC159","ANWC160","ANWC161","ANWC162","ANWC163","ANWC168","ANWC170","ANWC66","ANWC67","ANWC68","ANWC69","ANWC71","ANWC72","ANWC73","ANWC75","ANWC77","ANWC78","ANWC79","ANWC80","ANWC81","ANWC84","ANWC85","ANWC88","ANWC90","ANWC92","ANWC94","SAM11","SAM12","SAM14","SAM15","SAM16","SAM17","SAM18","SAM19","SAM20","SAM21","SAM25","SAM26","SAM27","SAM28","SAM29", "SAM30","SAM31","SAM32","SAM33","SAM34","SAM35","SAM36","SAM37","SAM38","SAM39","SAM41", "SAM44","SAM45","SAM46","SAM8","WAM55","WAM56","WAM57","WAM58","WAM59","WAM60","WAM63","WAM64")
names(sample.list) <- c("sample") # couldn't find object "sample" before
Model.sample <- data.frame() # creates datafame fresh each time model is run, so don't add to previous file
for (sample in sample.list) #creates loop defined by curley brackets
{
sample.m1 <- lm(NDays35 ~ scale(YearJJ, scale=FALSE),
data = subset(Change.Ndays35, SAMPLE.ID==sample))
#summary(sample.m1)
sample.s1 <-cbind(sample, intercept = sample.m1$coefficients[1], coefYear = sample.m1$coefficients[2],
df.residual = sample.m1$df.residual, adj.r.square = summary(sample.m1)$adj.r.squared,
df = summary(sample.m1)$df[2], fstat = summary(sample.m1)$fstatistic[1], pvalue = summary(sample.m1)$coefficients[2,4])
Model.sample <-rbind(Model.sample, sample.s1) # saves summary to file
}
rownames(Model.sample) <- 1:length(rownames(Model.sample)) # this overwrites the first column, and numbers rows sequentially
write.csv(Model.sample,"ChangeNdays35.csv")
#### CHANGE IN NO. DAYS UNDER 5 ####
Change.Ndays5 <- read.csv("NDays5.ForChange.csv", header = T)
summary(Change.Ndays5)
## SAMPLE.ID YearJD Ndays5
## ANWC100: 20 Min. :1974 Min. : 0.00
## ANWC101: 20 1st Qu.:1989 1st Qu.: 0.00
## ANWC102: 20 Median :1994 Median : 27.00
## ANWC103: 20 Mean :1994 Mean : 27.51
## ANWC104: 20 3rd Qu.:2000 3rd Qu.: 50.00
## ANWC105: 20 Max. :2014 Max. :139.00
## (Other):2020
# these are missing from this data set: "SAM9","SAM23", "SAM24", "SAM22", "SAM10", "SAM7", "WAM55", "ANWC84", "ANWC85", "ANWC88", "ANWC90","ANWC92"
sample.list <- c("ANWC100","ANWC101","ANWC102","ANWC103","ANWC104","ANWC105","ANWC106","ANWC107","ANWC108","ANWC109","ANWC110","ANWC112","ANWC113","ANWC115","ANWC116", "ANWC117","ANWC118","ANWC119","ANWC126","ANWC128","ANWC129","ANWC130","ANWC131","ANWC132","ANWC133","ANWC134","ANWC135","ANWC136","ANWC137", "ANWC138","ANWC139", "ANWC140","ANWC141","ANWC142","ANWC143","ANWC144","ANWC145","ANWC146","ANWC147", "ANWC148","ANWC150","ANWC151","ANWC152","ANWC153","ANWC154","ANWC155","ANWC156","ANWC157", "ANWC158","ANWC159","ANWC160","ANWC161","ANWC162","ANWC163","ANWC168","ANWC170","ANWC66", "ANWC67", "ANWC68", "ANWC69", "ANWC71", "ANWC72", "ANWC73", "ANWC75", "ANWC77", "ANWC78","ANWC79", "ANWC80", "ANWC81", "ANWC94","SAM11", "SAM12", "SAM14", "SAM15", "SAM16", "SAM17", "SAM18", "SAM19", "SAM20", "SAM21","SAM25", "SAM26", "SAM27", "SAM28", "SAM29", "SAM30", "SAM31", "SAM32", "SAM33", "SAM34","SAM35", "SAM36", "SAM37", "SAM38", "SAM39", "SAM41", "SAM44", "SAM45", "SAM46", "SAM8", "WAM56", "WAM57", "WAM58", "WAM59", "WAM60", "WAM63", "WAM64")
names(sample.list) <- c("sample") # couldn't find object "sample" before
Model.sample <- data.frame() # creates datafame fresh each time model is run, so don't add to previous file
for (sample in sample.list) #creates loop defined by curley brackets
{
sample.m1 <- lm(Ndays5 ~ scale(YearJD, scale=FALSE),
data = subset(Change.Ndays5, SAMPLE.ID==sample))
#summary(sample.m1)
sample.s1 <-cbind(sample, intercept = sample.m1$coefficients[1], coefYear = sample.m1$coefficients[2],
df.residual = sample.m1$df.residual, adj.r.square = summary(sample.m1)$adj.r.squared,
df = summary(sample.m1)$df[2], fstat = summary(sample.m1)$fstatistic[1], pvalue = summary(sample.m1)$coefficients[2,4])
Model.sample <-rbind(Model.sample, sample.s1) # saves summary to file
}
rownames(Model.sample) <- 1:length(rownames(Model.sample)) # this overwrites the first column, and numbers rows sequentially
write.csv(Model.sample,"ChangeNdays5.csv")
#### CHANGE IN PRECIPITATION ####
Change.Rain <- read.csv("Rain.ForChange.csv", header = T)
summary(Change.Rain)
## SAMPLE.ID YearJD RainSum
## ANWC119: 20 Min. :1968 Min. :0.0000355
## ANWC84 : 20 1st Qu.:1988 1st Qu.:0.0067233
## ANWC85 : 20 Median :1994 Median :0.0105657
## SAM41 : 20 Mean :1994 Mean :0.0142994
## ANWC100: 19 3rd Qu.:2000 3rd Qu.:0.0195285
## ANWC101: 19 Max. :2014 Max. :0.0679789
## (Other):2033
# these are missing from this data set: "SAM9","SAM23", "SAM24", "SAM22", "SAM10", "SAM7"
sample.list <- c("ANWC100","ANWC101","ANWC102","ANWC103","ANWC104","ANWC105","ANWC106","ANWC107","ANWC108","ANWC109","ANWC110","ANWC112","ANWC113","ANWC115","ANWC116", "ANWC117","ANWC118","ANWC119","ANWC126","ANWC128","ANWC129","ANWC130","ANWC131","ANWC132","ANWC133","ANWC134","ANWC135","ANWC136","ANWC137","ANWC138","ANWC139","ANWC140","ANWC141","ANWC142","ANWC143","ANWC144","ANWC145","ANWC146","ANWC147","ANWC148","ANWC150","ANWC151","ANWC152","ANWC153","ANWC154","ANWC155","ANWC156","ANWC157","ANWC158","ANWC159","ANWC160","ANWC161","ANWC162","ANWC163","ANWC168","ANWC170","ANWC66","ANWC67","ANWC68","ANWC69","ANWC71","ANWC72","ANWC73","ANWC75","ANWC77","ANWC78","ANWC79","ANWC80","ANWC81","ANWC84","ANWC85","ANWC88","ANWC90","ANWC92","ANWC94","SAM11","SAM12","SAM14","SAM15","SAM16","SAM17","SAM18","SAM19","SAM20","SAM21","SAM25","SAM26","SAM27","SAM28","SAM29", "SAM30","SAM31","SAM32","SAM33","SAM34","SAM35","SAM36","SAM37","SAM38","SAM39","SAM41", "SAM44","SAM45","SAM46","SAM8","WAM55","WAM56","WAM57","WAM58","WAM59","WAM60","WAM63","WAM64")
names(sample.list) <- c("sample") # couldn't find object "sample" before
Model.sample <- data.frame() # creates datafame fresh each time model is run, so don't add to previous file
for (sample in sample.list) #creates loop defined by curley brackets
{
sample.m1 <- lm(RainSum ~ scale(YearJD, scale=FALSE),
data = subset(Change.Rain, SAMPLE.ID==sample))
#summary(sample.m1)
sample.s1 <-cbind(sample, intercept = sample.m1$coefficients[1], coefYear = sample.m1$coefficients[2],
df.residual = sample.m1$df.residual, adj.r.square = summary(sample.m1)$adj.r.squared,
df = summary(sample.m1)$df[2], fstat = summary(sample.m1)$fstatistic[1], pvalue = summary(sample.m1)$coefficients[2,4])
Model.sample <-rbind(Model.sample, sample.s1) # saves summary to file
}
rownames(Model.sample) <- 1:length(rownames(Model.sample)) # this overwrites the first column, and numbers rows sequentially
write.csv(Model.sample,"ChangeRain.csv")
# Calculate the correlation matrix to test for correlations between the variables
# Import the data
climate.corr <- read.csv("ElroyCorDataSummary.csv", header = T)
# Create the correlation matrix and write it into a new csv file
write.csv(cor(climate.corr[,2:17], use="pairwise.complete.obs"), "ElroyClimateCorMatrix.csv")
# how the distance matrix was calculated
# load libraries
library(geosphere) # to create the distance matrix
library(readxl)
#load data
metadata <- read_excel("climate-metadata.xlsx")
longitude <- metadata$LONGITUDE
latitude <- metadata$LATITUDE
coord <- data.frame(longitude,latitude)
coordmatrix <- as.matrix(coord)
# calculate distance in metres using shortest distance between
# 2 points method in geosphere package
distance <- distm(coordmatrix, fun=distGeo)
library(readxl)
library(ecodist)
# Load datasets
# CLIMATE DATA
# tropical + temperate individuals
climate.var <- read_excel("ClimVarMod.xlsx")
# tropical + temperate individuals - no missing data
nmd.clim.var <- read_excel("ClimVarMod-NoMissingData.xlsx")
# temperate only individuals
temp.climate.var <- read_excel("TemperateClimVarMod.xlsx")
# temperate only individuals - no missing data
nmd.temp.climate.var <- read_excel("TemperateClimVarMod-NoMissingData.xlsx")
# GENETIC MATRIX
# tropical + temperate individuals
genmatrix <- read.table("covariance-matrix.txt")
G <- as.matrix(genmatrix)
# tropical + temperate individuals - no missing data
nmd.genetic <- read.table("covariance-matrix-nmd.txt")
NG <- as.matrix(nmd.genetic)
# temperate only individuals
temp.genetic <- read.table("temperate-covariance-matrix.txt")
T.G <- as.matrix(temp.genetic)
# temperate only individuals - no missing data
nmd.temp.genetic <- read.table("covariance-matrix-nmd-temp.txt")
N.T.G <- as.matrix(nmd.temp.genetic)
# DISTANCE MATRIX
# tropical + temperate individuals
distance <- read.table("distance-matrix.txt")
D <- as.matrix(distance)
# tropical + temperate individuals - no missing data
nmd.distance <- read.table("distance-matrix-nmd.txt")
ND <- as.matrix(nmd.distance)
# temperate only individuals
temp.distance <- read.table("temperate-distance-matrix.txt")
T.D <- as.matrix(temp.distance)
# temperate only individuals - no missing data
nmd.temp.distance <- read.table("distance-matrix-nmd-temp.txt")
N.T.D <- as.matrix(nmd.temp.distance)
# INBREEDING MODEL
# tropical + temperate
MRM(dist(INBREEDING) ~ as.dist(D) + as.dist(G) + dist(ScaledNdays35five) + dist(ScaledNdays5five) + dist(ScaledRainfive) + dist(Year), data=climate.var, nperm=500)
## Warning in dist(ScaledNdays5five): NAs introduced by coercion
## Warning in dist(ScaledRainfive): NAs introduced by coercion
## $coef
## dist(INBREEDING) pval
## Int 9.304673e-02 0.068
## as.dist(D) -1.885499e-08 0.094
## as.dist(G) -1.502411e-01 0.106
## dist(ScaledNdays35five) -3.133070e-03 0.662
## dist(ScaledNdays5five) 2.553644e-02 0.010
## dist(ScaledRainfive) -3.210679e-03 0.640
## dist(Year) -1.649857e-03 0.104
##
## $r.squared
## R2 pval
## 0.04690912 0.02600000
##
## $F.test
## F F.pval
## 44.73085 0.02600
# tropical + temperate - no missing data
MRM(dist(INBREEDING) ~ as.dist(ND) + as.dist(NG) + dist(ScaledNdays35five) + dist(ScaledNdays5five) + dist(ScaledRainfive) + dist(Year), data=nmd.clim.var, nperm=500)
## $coef
## dist(INBREEDING) pval
## Int 9.283213e-02 0.072
## as.dist(ND) -1.866818e-08 0.092
## as.dist(NG) -1.412163e-01 0.152
## dist(ScaledNdays35five) -3.164813e-03 0.642
## dist(ScaledNdays5five) 2.553907e-02 0.022
## dist(ScaledRainfive) -3.258472e-03 0.652
## dist(Year) -1.642770e-03 0.124
##
## $r.squared
## R2 pval
## 0.04676811 0.04000000
##
## $F.test
## F F.pval
## 44.5898 0.0400
# temperate only
MRM(dist(INBREEDING) ~ as.dist(T.D) + as.dist(T.G) + dist(ScaledNdays35five) + dist(ScaledNdays5five) + dist(ScaledRainfive) + dist(Year), data=temp.climate.var, nperm=500)
## Warning in dist(ScaledNdays5five): NAs introduced by coercion
## Warning in dist(ScaledNdays5five): NAs introduced by coercion
## $coef
## dist(INBREEDING) pval
## Int 1.103537e-01 0.428
## as.dist(T.D) 2.833225e-09 0.760
## as.dist(T.G) -7.405639e-02 0.182
## dist(ScaledNdays35five) -1.592697e-02 0.120
## dist(ScaledNdays5five) 2.089003e-02 0.056
## dist(ScaledRainfive) -1.622833e-02 0.166
## dist(Year) -1.210449e-03 0.226
##
## $r.squared
## R2 pval
## 0.03172831 0.18800000
##
## $F.test
## F F.pval
## 13.15088 0.18800
# GENETICS MODEL
# tropical + temperate
MRM(as.dist(G) ~ as.dist(D) + dist(ScaledNdays35five) + dist(ScaledNdays5five) + dist(ScaledRainfive) + dist(Year), data=climate.var, nperm=500)
## Warning in dist(ScaledNdays5five): NAs introduced by coercion
## Warning in dist(ScaledNdays5five): NAs introduced by coercion
## $coef
## as.dist(G) pval
## Int 2.609565e-02 0.002
## as.dist(D) -2.114681e-08 0.002
## dist(ScaledNdays35five) -3.199908e-03 0.002
## dist(ScaledNdays5five) -4.261939e-04 0.174
## dist(ScaledRainfive) 1.937645e-03 0.004
## dist(Year) -4.798280e-04 0.002
##
## $r.squared
## R2 pval
## 0.3018008 0.0020000
##
## $F.test
## F F.pval
## 471.5049 0.0020
# tropical + temperate - no missing data
MRM(as.dist(NG) ~ as.dist(ND) + dist(ScaledNdays35five) + dist(ScaledNdays5five) + dist(ScaledRainfive) + dist(Year), data=nmd.clim.var, nperm=500)
## $coef
## as.dist(NG) pval
## Int 2.624368e-02 0.002
## as.dist(ND) -2.117537e-08 0.002
## dist(ScaledNdays35five) -3.629192e-03 0.002
## dist(ScaledNdays5five) -4.347884e-04 0.210
## dist(ScaledRainfive) 1.723037e-03 0.002
## dist(Year) -4.603036e-04 0.002
##
## $r.squared
## R2 pval
## 0.3017216 0.0020000
##
## $F.test
## F F.pval
## 471.3277 0.0020
# temperate only
MRM(as.dist(T.G) ~ as.dist(T.D) + dist(ScaledNdays35five) + dist(ScaledNdays5five) + dist(ScaledRainfive) + dist(Year), data=temp.climate.var, nperm=500)
## Warning in dist(ScaledNdays5five): NAs introduced by coercion
## Warning in dist(ScaledNdays5five): NAs introduced by coercion
## $coef
## as.dist(T.G) pval
## Int 9.412562e-03 0.782
## as.dist(T.D) -2.359639e-09 0.014
## dist(ScaledNdays35five) -1.758287e-02 0.002
## dist(ScaledNdays5five) -5.800697e-04 0.338
## dist(ScaledRainfive) -1.962407e-03 0.020
## dist(Year) -8.118931e-04 0.004
##
## $r.squared
## R2 pval
## 0.07501759 0.00200000
##
## $F.test
## F F.pval
## 39.07477 0.00200
# temperate only - no missing data
MRM(as.dist(N.T.G) ~ as.dist(N.T.D) + dist(ScaledNdays35five) + dist(ScaledNdays5five) + dist(ScaledRainfive) + dist(Year), data=nmd.temp.climate.var, nperm=500)
## $coef
## as.dist(N.T.G) pval
## Int 1.534757e-02 0.002
## as.dist(N.T.D) -2.030517e-08 0.002
## dist(ScaledNdays35five) -8.872619e-03 0.004
## dist(ScaledNdays5five) 4.213988e-03 0.002
## dist(ScaledRainfive) 3.628750e-03 0.002
## dist(Year) -5.967811e-04 0.014
##
## $r.squared
## R2 pval
## 0.1934525 0.0020000
##
## $F.test
## F F.pval
## 115.561 0.002
# TEMPORAL INBREEDING MODEL
# load libraries
library(sommer)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: lattice
## Loading required package: crayon
##
## Attaching package: 'crayon'
## The following object is masked from 'package:ggplot2':
##
## %+%
library(readxl)
# load datasets
# these datasets have no missing data for any metadata or variables
# CLIMATE DATA
# tropical + temperate
nmd.clim.var <- read_excel("ClimVarMod-NoMissingData.xlsx")
#temperate only
nmd.temp.clim.var <- read_excel("TemperateClimVarMod-NoMissingData.xlsx")
# GENETIC MATRIX
# tropical + temperate
nmd.matrix <- read.table("covariance-matrix-nmd.txt")
G <- as.matrix(nmd.matrix)
# temperate only
nmd.temp.matrix <- read.table("covariance-matrix-nmd-temp.txt")
T.G <- as.matrix(nmd.temp.matrix)
# MIXED MODEL IN SOMMER
# tropical + temperate - PRIMARY
inbreed.model <- mmer(fixed = INBREEDING ~ ScaledChangeMeanTemp * ScaledNdays35five + ScaledChangeMeanTemp * ScaledNdays5five + ScaledRainfive * ScaledChangeRain, random = ~ vs(ID, Gu=G) + Year + IBRA, rcov= ~ units, data=nmd.clim.var)
## iteration LogLik wall cpu(sec) restrained
## 1 -45.6366 22:19:43 0 2
## 2 -42.621 22:19:43 0 2
## 3 -42.0242 22:19:43 0 2
## 4 -40.9728 22:19:43 0 2
## 5 -40.3969 22:19:43 0 2
## 6 -40.3299 22:19:43 0 2
## 7 -40.3285 22:19:43 0 2
## 8 -40.3285 22:19:43 0 2
# tropical + temperate - COLD
inbreed.model2 <- mmer(fixed = INBREEDING ~ ScaledChangeMeanTemp * ScaledNdays5five + ScaledRainfive * ScaledChangeRain, random = ~ vs(ID, Gu=G) + Year + IBRA, rcov= ~ units, data=nmd.clim.var)
## iteration LogLik wall cpu(sec) restrained
## 1 -46.6681 22:19:43 0 2
## 2 -43.2365 22:19:43 0 2
## 3 -42.6376 22:19:43 0 2
## 4 -41.562 22:19:43 0 2
## 5 -40.902 22:19:43 0 2
## 6 -40.8039 22:19:43 0 2
## 7 -40.8016 22:19:43 0 2
## 8 -40.8016 22:19:43 0 2
# tropical + temperate - HOT
inbreed.model3 <- mmer(fixed = INBREEDING ~ ScaledChangeMeanTemp * ScaledNdays35five + ScaledRainfive * ScaledChangeRain, random = ~ vs(ID, Gu=G) + Year + IBRA, rcov= ~ units, data=nmd.clim.var)
## iteration LogLik wall cpu(sec) restrained
## 1 -50.3615 22:19:43 0 2
## 2 -46.3847 22:19:43 0 2
## 3 -45.7831 22:19:43 0 2
## 4 -44.6763 22:19:43 0 2
## 5 -43.8759 22:19:43 0 3
## 6 -43.7136 22:19:43 0 3
## 7 -43.7136 22:19:43 0 3
# TEMPORAL GENETICS MODEL
library(vegan)
## Loading required package: permute
## This is vegan 2.5-5
##
## Attaching package: 'vegan'
## The following object is masked from 'package:ecodist':
##
## mantel
library(readxl)
# load datasets
# these datasets have no missing data for any metadata or variables
# CLIMATE DATA
# tropical + temperate
nmd.clim.var <- read_excel("ClimVarMod-NoMissingData.xlsx")
#temperate only
nmd.temp.clim.var <- read_excel("TemperateClimVarMod-NoMissingData.xlsx")
# GENETIC MATRIX
# tropical + temperate
nmd.matrix <- read.table("covariance-matrix-nmd.txt")
G <- as.matrix(nmd.matrix)
AG <- as.matrix(G)
# convert any negative values to a zero as adonis2 doesn't like negatives
AG[AG < 0] <- 0
# temperate only
nmd.temp.matrix <- read.table("covariance-matrix-nmd-temp.txt")
T.G <- as.matrix(nmd.temp.matrix)
# MIXED MODEL USING ADONIS
# tropical + temperate
temporal.model1 <- adonis2(AG ~ ScaledChangeMeanTemp*ScaledNdays35five + ScaledChangeMeanTemp*ScaledNdays5five + ScaledRainfive*ScaledChangeRain + Year + IBRA, data = nmd.clim.var)
temporal.model2 <- adonis2(AG ~ ScaledChangeMeanTemp*ScaledNdays35five + ScaledRainfive*ScaledChangeRain + Year + IBRA, data = nmd.clim.var)
temporal.model3 <- adonis2(AG ~ ScaledChangeMeanTemp*ScaledNdays5five + ScaledRainfive*ScaledChangeRain + Year + IBRA, data = nmd.clim.var)
# Calculate AICs for adonis models
# Script source: https://github.com/kdyson/R_Scripts/blob/master/AICc_PERMANOVA.R
AICc.PERMANOVA2 <- function(adonis2.model) {
# check to see if object is an adonis2 model...
if (is.na(adonis2.model$SumOfSqs[1]))
stop("object not output of adonis2 {vegan} ")
# Ok, now extract appropriate terms from the adonis model Calculating AICc
# using residual sum of squares (RSS or SSE) since I don't think that adonis
# returns something I can use as a liklihood function... o wait maximum likelihood may be MSE.
RSS <- adonis2.model$SumOfSqs[ length(adonis2.model$SumOfSqs) - 1 ]
MSE <- RSS / adonis2.model$Df[ length(adonis2.model$Df) - 1 ]
nn <- adonis2.model$Df[ length(adonis2.model$Df) ] + 1
k <- nn - adonis2.model$Df[ length(adonis2.model$Df) - 1 ]
# AIC : 2*k + n*ln(RSS)
# AICc: AIC + [2k(k+1)]/(n-k-1)
# based on https://en.wikipedia.org/wiki/Akaike_information_criterion;
# https://www.researchgate.net/post/What_is_the_AIC_formula;
# http://avesbiodiv.mncn.csic.es/estadistica/ejemploaic.pdf
# AIC.g is generalized version of AIC = 2k + n [Ln( 2(pi) RSS/n ) + 1]
# AIC.pi = k + n [Ln( 2(pi) RSS/(n-k) ) +1],
AIC <- 2*k + nn*log(RSS)
AIC.g <- 2*k + nn * (1 + log( 2 * pi * RSS / nn))
AIC.MSE <- 2*k + nn * log(MSE)
AIC.pi <- k + nn*(1 + log( 2*pi*RSS/(nn-k) ) )
AICc <- AIC + (2*k*(k + 1))/(nn - k - 1)
AICc.MSE <- AIC.MSE + (2*k*(k + 1))/(nn - k - 1)
AICc.pi <- AIC.pi + (2*k*(k + 1))/(nn - k - 1)
output <- list("AIC" = AIC, "AIC.g" = AIC.g, "AICc" = AICc,
"AIC.MSE" = AIC.MSE, "AICc.MSE" = AICc.MSE,
"AIC.pi" = AIC.pi, "AICc.pi" = AICc.pi, "k" = k)
return(output)
}
AICc.PERMANOVA2(adonis2.model = temporal.model1)
## $AIC
## [1] 380.5788
##
## $AIC.g
## [1] 189.89
##
## $AICc
## [1] 431.8288
##
## $AIC.MSE
## [1] -57.73189
##
## $AICc.MSE
## [1] -6.481886
##
## $AIC.pi
## [1] 200.2452
##
## $AICc.pi
## [1] 251.4952
##
## $k
## [1] 40
AICc.PERMANOVA2(adonis2.model = temporal.model2)
## $AIC
## [1] 382.0189
##
## $AIC.g
## [1] 191.3301
##
## $AICc
## [1] 426.9279
##
## $AIC.MSE
## [1] -59.47387
##
## $AICc.MSE
## [1] -14.56478
##
## $AIC.pi
## [1] 200.5032
##
## $AICc.pi
## [1] 245.4123
##
## $k
## [1] 38
AICc.PERMANOVA2(adonis2.model = temporal.model3)
## $AIC
## [1] 382.1366
##
## $AIC.g
## [1] 191.4478
##
## $AICc
## [1] 427.0457
##
## $AIC.MSE
## [1] -59.35614
##
## $AICc.MSE
## [1] -14.44704
##
## $AIC.pi
## [1] 200.621
##
## $AICc.pi
## [1] 245.53
##
## $k
## [1] 38
# Calculate correlation coefficients to tell directions of trends
# Load datasets
distance <- read.table("distance-matrix.txt")
D <- as.matrix(distance)
nmd.distance <- read.table("distance-matrix-nmd.txt")
ND <- as.matrix(nmd.distance)
nmd.genetic <- read.table("covariance-matrix-nmd.txt")
NG <- as.matrix(nmd.genetic)
nmd.clim.var <- read_excel("ClimVarMod-NoMissingData.xlsx")
# remove zeroes in covariance matrix, like for adonis
NG2 <- as.matrix(nmd.genetic)
NG2[NG2 < 0] <- 0
# Load variables as distance based
covariance <- as.numeric(as.dist(NG))
distance <- as.numeric(as.dist(ND))
year <- as.numeric (dist(nmd.clim.var$Year))
mean.temp <- as.numeric(dist(nmd.clim.var$ScaledChangeMeanTemp))
rain <- as.numeric(dist(nmd.clim.var$ScaledChangeRain))
ndasy35 <- as.numeric(dist(nmd.clim.var$ScaledNdays35five))
ndasy5 <- as.numeric(dist(nmd.clim.var$ScaledNdays5five))
covariance.zero <- as.numeric(as.dist(NG2))
mean.temp.35 <- mean.temp * ndasy35
mean.temp.5 <- mean.temp * ndasy5
rain.change <- as.numeric(dist(nmd.clim.var$ScaledChangeRain))
rain <- as.numeric(dist(nmd.clim.var$ScaledRainfive))
rain.change.rain <- rain.change * rain
# calculate pearson's correlation co-efficients
cor.test(covariance.zero, year, method=c("pearson"))
##
## Pearson's product-moment correlation
##
## data: covariance.zero and year
## t = -10.969, df = 5458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1727214 -0.1208133
## sample estimates:
## cor
## -0.1468685
cor.test(covariance.zero, mean.temp.35, method=c("pearson"))
##
## Pearson's product-moment correlation
##
## data: covariance.zero and mean.temp.35
## t = -12.345, df = 5458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1905046 -0.1388929
## sample estimates:
## cor
## -0.1648116
cor.test(covariance.zero, mean.temp, method=c("pearson"))
##
## Pearson's product-moment correlation
##
## data: covariance.zero and mean.temp
## t = -5.5136, df = 5458, p-value = 3.678e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.10075044 -0.04799239
## sample estimates:
## cor
## -0.0744235
cor.test(covariance.zero, ndasy35, method=c("pearson"))
##
## Pearson's product-moment correlation
##
## data: covariance.zero and ndasy35
## t = -18.705, df = 5458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2702083 -0.2203504
## sample estimates:
## cor
## -0.2454417
cor.test(covariance.zero, ndasy5, method=c("pearson"))
##
## Pearson's product-moment correlation
##
## data: covariance.zero and ndasy5
## t = -12.666, df = 5458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1946294 -0.1430914
## sample estimates:
## cor
## -0.1689759
cor.test(covariance.zero, mean.temp.5, method=c("pearson"))
##
## Pearson's product-moment correlation
##
## data: covariance.zero and mean.temp.5
## t = -9.2869, df = 5458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.15075133 -0.09852435
## sample estimates:
## cor
## -0.1247242
cor.test(covariance.zero, rain.change, method=c("pearson"))
##
## Pearson's product-moment correlation
##
## data: covariance.zero and rain.change
## t = -14.461, df = 5458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2175136 -0.1664183
## sample estimates:
## cor
## -0.1920961
cor.test(covariance.zero, rain, method=c("pearson"))
##
## Pearson's product-moment correlation
##
## data: covariance.zero and rain
## t = -14.989, df = 5458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2241750 -0.1732193
## sample estimates:
## cor
## -0.1988315
cor.test(covariance.zero, rain.change.rain, method=c("pearson"))
##
## Pearson's product-moment correlation
##
## data: covariance.zero and rain.change.rain
## t = -12.25, df = 5458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1892881 -0.1376551
## sample estimates:
## cor
## -0.1635836
# Load libraries
library(ggplot2)
library(readxl)
# Load datasets
distance <- read.table("distance-matrix.txt")
D <- as.matrix(distance)
genmatrix <- read.table("covariance-matrix.txt")
G <- as.matrix(genmatrix)
climate.var <- read_excel("ClimVarMod.xlsx")
# Create the dataframe for the hexplots
covariance <- as.numeric(as.dist(G))
distance <- as.numeric(as.dist(D))
year <- as.numeric (dist(climate.var$Year))
rain <- as.numeric(dist(climate.var$ScaledChangeRain))
mean.temp <- as.numeric(dist(climate.var$ScaledChangeMeanTemp))
hex.data <- data.frame(year, covariance, rain, mean.temp, distance)
# Plot
# Genetic covariance and pairwise temporal distance between samples
ggplot(hex.data, aes(covariance, year)) + geom_hex() + labs(x = "Genetic Covariance", y = "Age Difference (Years)") + scale_fill_gradient(low = "gray25", high = "royalblue3") + theme_bw()
# Genetic covariance and pairwsie spatial distance between samples
ggplot(hex.data, aes(covariance, distance)) + geom_hex() + labs(x = "Genetic Covariance", y = "Distance between Individuals (m)") + scale_fill_gradient(low = "gray25", high = "royalblue3") + theme_bw()
# Genetic covariance and pairwise difference in rate of mean temperature
# change
ggplot(hex.data, aes(covariance, mean.temp)) + geom_hex() + labs(x = "Genetic Covariance", y = "Difference in Rate of Mean Temperature Change (°C)") + theme_bw()
# Genetic covariance and pairwise difference in rate of change in annual
# rainfall